package com.startx.http.system.text;

import java.util.HashSet;
import java.util.List;
import java.util.Set;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CoreSynonymDictionary;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.suggest.Suggester;
import com.hankcs.hanlp.tokenizer.IndexTokenizer;

/**
 * 中文NLP工具
 * @author minghu.zhang
 */
public class NLProg {

	/**
	 * 短语相似度提取
	 */
	public static Set<String> distance(List<String> keyword) {
		Set<String> similarity = new HashSet<>();
		for (String a : keyword)
		{
		    for (String b : keyword)
		    {
		    	long distance = CoreSynonymDictionary.distance(a, b);
		    	if(distance < 50000 && distance > 0) {
		    		similarity.add(a);
		    		similarity.add(b);
		    	}
		    }
		}
		
		return similarity;
	}
	
	/**
	 * 搜索推荐
	 */
	public static List<String> suggest(String[] titles,String key, int size) {
		Suggester suggester = new Suggester();
		for (String title : titles)
		{
		    suggester.addSentence(title);
		}
		
		return suggester.suggest(key, size);
	}
	
	/**
	 * 文本分词
	 */
	public static List<Term> tokenizer(String content) {
		return IndexTokenizer.segment(content);
	}
	
	/**
	 * 提取关键字
	 * @param content
	 * @param size
	 * @return
	 */
	public static String keyword(String content,int size) {
		List<String> keywords = HanLP.extractKeyword(content, size);
		
		StringBuffer buffer = new StringBuffer();
		for (String keyword : keywords) {
			buffer.append(",").append(keyword);
		}
		return buffer.substring(1);
	}
	
	/**
	 * 提取摘要
	 * @param content
	 * @param size
	 * @return
	 */
	public static String summary(String content,int size) {
		List<String> summarys = HanLP.extractSummary(content, size);
		
		StringBuffer buffer = new StringBuffer();
		for (String summary : summarys) {
			buffer.append(",").append(summary);
		}
		return buffer.substring(1);
	}
	
	/**
	 * 提取短语
	 * @param content
	 * @param size
	 * @return
	 */
	public static String phrase(String content,int size) {
		List<String> phrases = HanLP.extractPhrase(content, size);
		
		StringBuffer buffer = new StringBuffer();
		for (String phrase : phrases) {
			buffer.append(",").append(phrase);
		}
		return buffer.substring(1);
	}
}
